olive = c("#34431A", "#637430", "#949F37", "#C0BF31", "#D8D466", "#EFEDB0")
green = c("#18391A", "#246D36", "#458D41", "#62AA46", "#9DC376", "#D8E3C3")
teal = c("#01354A", "#006474", "#14939A", "#4FB3B3", "#94CCCD", "#CBE2E8")
blue = c("#192C57", "#0A4B83", "#056CA7", "#5791C3", "#9CC3E2", "#C7E2F5")
purple = c("#46174D", "#76266D", "#A04A8B", "#B276AA", "#CEA4C8", "#E8D3E6")
red = c("#711311", "#972622", "#BD3B3B", "#D56362", "#E6A0A2", "#F5CCC8")
orange = c("#7F3218", "#AF4E24", "#E06B25", "#ED9646", "#F6BC7D", "#F9DCBC")
yellow = c("#685221", "#99752B", "#C69D2B", "#E3C24F", "#F1D687", "#FBEDC0")
mud = c("#615C48", "#858062", "#A7A083", "#C6BFA3", "#E4DDCA", "#F6F3EC")
grey = c("#1F2E43", "#47536B", "#6D788C", "#98A1B0", "#C8CCD8", "#E4E3E9")
skin = c("#422B1D", "#744D3C", "#906753", "#BC9578", "#DBB9A0", "#F5E2D3")
fill_colors <- c(
# 4d cyto
"4d_CNT_cyto_1" = olive[2], "4d_OPTN_cyto_1" = orange[2],
"4d_CNT_cyto_2" = olive[2], "4d_OPTN_cyto_2" = orange[2],
"4d_CNT_cyto_3" = olive[2], "4d_OPTN_cyto_3" = orange[2],
# 4d synap
"4d_CNT_synap_1" = green[2], "4d_OPTN_synap_1" = red[2],
"4d_CNT_synap_2" = green[2], "4d_OPTN_synap_2" = red[2],
"4d_CNT_synap_3" = green[2], "4d_OPTN_synap_3" = red[2],
# 5d synap
"5d_CNT_cyto_1" = olive[3], "5d_OPTN_cyto_1" = orange[3],
"5d_CNT_cyto_2" = olive[3], "5d_OPTN_cyto_2" = orange[3],
"5d_CNT_cyto_3" = olive[3], "5d_OPTN_cyto_3" = orange[3],
# 5d synap
"5d_CNT_synap_1" = green[3], "5d_OPTN_synap_1" = red[3],
"5d_CNT_synap_2" = green[3], "5d_OPTN_synap_2" = red[3],
"5d_CNT_synap_3" = green[3], "5d_OPTN_synap_3" = red[3],
# 6d synap
"6d_CNT_cyto_1" = olive[4], "6d_OPTN_cyto_1" = orange[4],
"6d_CNT_cyto_2" = olive[4], "6d_OPTN_cyto_2" = orange[4],
"6d_CNT_cyto_3" = olive[4], "6d_OPTN_cyto_3" = orange[4],
# 6d synap
"6d_CNT_synap_1" = green[4], "6d_OPTN_synap_1" = red[4],
"6d_CNT_synap_2" = green[4], "6d_OPTN_synap_2" = red[4],
"6d_CNT_synap_3" = green[4], "6d_OPTN_synap_3" = red[4]
)
QCfill = grey[3]
screefill = grey[3]
downfill = purple[1]
upfill = blue[1]
nsfill = grey[5]
vennfillup = c("4d up cyto" = blue[5], "4d up synap" = teal[5],
"5d up cyto" = blue[6], "5d up synap" = teal[6])
vennfilldown = c("4d down cyto" = purple[5], "4d down synap" = yellow[5],
"5d down cyto" = purple[6], "5d down synap" = yellow[6])
options(repos = c(CRAN = "https://cloud.r-project.org/"))
required_pkgs <- c("tidyverse",
"tidyr",
"dplyr",
"BiocManager",
"readr",
"stringr",
"plotly",
"ggplot2",
"patchwork",
"grid",
"knitr",
"sysfonts",
"showtext",
"htmlwidgets",
"UpSetR",
"eulerr",
"tibble",
"purrr",
"ggrepel")
bioc_pkgs <- c("limma",
"impute",
"UniProt.ws",
"pcaMethods",
"orthogene")
#CRAN packages
for (pkg in required_pkgs) {
if (!requireNamespace(pkg, quietly = TRUE)) {
install.packages(pkg, dependencies = TRUE)
}
library(pkg, character.only = TRUE)
}
#Bioconductor packages
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
for (pkg in bioc_pkgs) {
if (!requireNamespace(pkg, quietly = TRUE)) {
BiocManager::install(pkg, ask = FALSE, update = FALSE)
}
library(pkg, character.only = TRUE)
}
##
## Es gibt eine Binärversion, jedoch ist der Quelltext neuer:
## binary source needs_compilation
## orthogene 1.16.0 1.16.1 FALSE
raw <- read.delim("4d_5d_normalized_prot.tsv", sep = "\t", header = TRUE)
all_cols <- colnames(raw)
parsed <- purrr::map_dfr(all_cols, parse_colname)
raw_renamed <- raw
idx <- match(parsed$old_name, names(raw_renamed))
names(raw_renamed)[idx] <- parsed$new_name
anno <- annotate_proteins_uniprot(raw_df = raw_renamed,
tax_id = 10090,
remove_suffix = "_MOUSE$",
proteinID = "PG.ProteinAccessions")
data <- anno$data
mapping <- anno$mapping # optional backup / diagnostics
# GLOBAL
global_clean <- function(data) {
message("Handling duplicate / missing gene names\n")
duplicates <- data[duplicated(data$Gene.Names) |
duplicated(data$Gene.Names, fromLast = TRUE), ]
duplicated_genes <- unique(data$Gene.Names[duplicated(data$Gene.Names)])
message("This dataset contains ", nrow(duplicates),
" duplicates in Gene.Names\n")
if (length(duplicated_genes) > 0) {
message("Duplicated gene names: ",
paste(duplicated_genes, collapse = ", "), "\n")
}
ix <- as.integer(rownames(duplicates))
data$Gene.Names[ix] <- data$Entry.Name[ix]
rownames(data) <- data$Gene.Names
message("-------------------------------------------------------------")
message("This dataset contains ", nrow(data), " proteins\n")
message("-------------------------------------------------------------\n")
message("Removing contaminations\n")
keratins <- grep("keratin", data$descriptions, ignore.case = TRUE)
if (length(keratins) > 0) data <- data[-keratins, ]
message(length(keratins), " keratin proteins removed\n")
album <- grep("album", data$descriptions, ignore.case = TRUE)
if (length(album) > 0) data <- data[-album, ]
message(length(album), " albumin proteins removed\n")
trypsin <- grep("trypsin", data$descriptions, ignore.case = TRUE)
if (length(trypsin) > 0) data <- data[-trypsin, ]
message(length(trypsin), " trypsin proteins removed\n\n")
return(data)
}
# SUBSETS
clean_subset <- function(data,
batch,
fraction,
metric = "MS1",
namecols = c("Entry.Name",
"Gene.Names",
"descriptions",
"PG.Pvalue",
"PG.Qvalue",
"PG.Cscore",
"proteinID")) {
key <- paste(batch, fraction, sep = "_")
message("=== Processing subset: ", key, " ===\n")
# 1) identify MS1 columns for this subset (this is the subset definition)
pattern_ms1 <- paste0("^", batch, "_.*_", fraction, "_[0-9]+_", metric, "$")
ms1_cols <- grep(pattern_ms1, colnames(data), value = TRUE)
if (length(ms1_cols) == 0L) {
stop("No MS1 columns found for subset ", key)
}
# derive matching MS2 / RunEv / Precursor columns
base <- sub(paste0("_", metric, "$"), "", ms1_cols)
ms2_cols <- paste0(base, "_MS2")
runev_cols <- paste0(base, "_RunEvCount")
precursor_cols <- paste0(base, "_PrecursorQuant")
ms2_cols <- intersect(ms2_cols, colnames(data))
runev_cols <- intersect(runev_cols, colnames(data))
precursor_cols <- intersect(precursor_cols, colnames(data))
# 2) create the actual subset data frame:
# all rows, only metadata + cols for this subset
subset_cols <- c(namecols, ms1_cols, ms2_cols, runev_cols, precursor_cols)
subset_cols <- intersect(subset_cols, colnames(data))
data <- data[, subset_cols, drop = FALSE]
# 3) statistical filters (only within this subset)
message("Removing proteins with low statistical power in subset ", key, "\n")
Pvalue <- data %>% filter(PG.Pvalue < 0.05)
message(nrow(data) - nrow(Pvalue),
" proteins with p >= 0.05 removed\n")
Qvalue <- Pvalue %>% filter(PG.Qvalue < 0.01)
message(nrow(Pvalue) - nrow(Qvalue),
" proteins with Q >= 0.01 removed\n")
Cscore <- Qvalue %>% filter(PG.Cscore >= 3)
message(nrow(Qvalue) - nrow(Cscore),
" proteins with C-score < 3 removed\n")
evid <- Cscore %>%
filter(rowSums(.[, runev_cols] >= 2, na.rm = TRUE) > 0)
message(nrow(Cscore) - nrow(evid),
" proteins with run evidence counts < 2 removed\n")
prec <- evid %>%
filter(rowSums(.[, precursor_cols] >= 2, na.rm = TRUE) > 0)
message(nrow(evid) - nrow(prec),
" proteins with < 2 precursors removed\n")
#intensity_threshold <- quantile(as.matrix(prec[, ms1_cols]),
# 0.05, na.rm = TRUE)
#intensities <- prec %>%
# filter(rowSums(.[, ms1_cols] >= intensity_threshold,
# na.rm = TRUE) > 0)
#message(nrow(prec) - nrow(intensities),
# " proteins with MS1 in lower 5% removed\n\n")
data <- prec
# 3) set intensities to numeric
data[, c(ms1_cols, ms2_cols)] <- lapply(
data[, c(ms1_cols, ms2_cols), drop = FALSE],
as.numeric
)
data_full_cleaned <- data
# 4) subset to Gene.Names + MS1, rename columns
message("Subsetting Gene.Names and MS1 columns for ", key, "\n")
data_sub <- data %>%
dplyr::select(Gene.Names, all_of(ms1_cols))
colnames(data_sub)[match(ms1_cols, colnames(data_sub))] <-
sub(paste0("_", metric, "$"), "", ms1_cols)
ms1_base <- setdiff(colnames(data_sub), "Gene.Names")
data_clean_raw <- data_sub
# 5) log2 transform
message("Log2 transform MS1 intensities\n\n")
data_sub[, ms1_base] <- log2(pmax(data_sub[, ms1_base], 1))
data_pca <- data_sub %>%
dplyr::select(all_of(ms1_base))
# 6) bpca imputation
message("Bayesian PCA imputation for ", key, "\n")
bpca_impute <- function(df, nPcs = 6) {
mat <- as.matrix(df)
fit <- pca(mat, method = "bpca", nPcs = nPcs)
completeObs(fit)
}
data_bpca <- bpca_impute(data_pca, nPcs = 5)
data_pca_mat <- t(data_bpca)
# 7) pivot long for QC plots
message("Pivot cleaned dataset to long format for ", key, "\n")
data_piv <- data_sub %>%
dplyr::select(-Gene.Names) %>%
tibble::rownames_to_column(var = "gene_names") %>%
tidyr::pivot_longer(
cols = -gene_names,
names_to = "sample",
values_to = "intensities"
)
results <- list(
batch = batch,
fraction = fraction,
data_full_cleaned = data_full_cleaned,
data_clean_raw = data_clean_raw,
data_bpca = data_bpca,
data_pca_mat = data_pca_mat,
data_piv = data_piv
)
return(results)
}
## Handling duplicate / missing gene names
## This dataset contains 28 duplicates in Gene.Names
## Duplicated gene names:
## -------------------------------------------------------------
## This dataset contains 7836 proteins
## -------------------------------------------------------------
## Removing contaminations
## 20 keratin proteins removed
## 1 albumin proteins removed
## 2 trypsin proteins removed
## === Processing subset: 5d_cyto ===
## Removing proteins with low statistical power in subset 5d_cyto
## 45 proteins with p >= 0.05 removed
## 0 proteins with Q >= 0.01 removed
## 0 proteins with C-score < 3 removed
## 408 proteins with run evidence counts < 2 removed
## 75 proteins with < 2 precursors removed
## Subsetting Gene.Names and MS1 columns for 5d_cyto
## Log2 transform MS1 intensities
## Bayesian PCA imputation for 5d_cyto
## Pivot cleaned dataset to long format for 5d_cyto
## === Processing subset: 5d_synap ===
## Removing proteins with low statistical power in subset 5d_synap
## 45 proteins with p >= 0.05 removed
## 0 proteins with Q >= 0.01 removed
## 0 proteins with C-score < 3 removed
## 667 proteins with run evidence counts < 2 removed
## 92 proteins with < 2 precursors removed
## Subsetting Gene.Names and MS1 columns for 5d_synap
## Log2 transform MS1 intensities
## Bayesian PCA imputation for 5d_synap
## Pivot cleaned dataset to long format for 5d_synap
## === Processing subset: 4d_cyto ===
## Removing proteins with low statistical power in subset 4d_cyto
## 45 proteins with p >= 0.05 removed
## 0 proteins with Q >= 0.01 removed
## 0 proteins with C-score < 3 removed
## 448 proteins with run evidence counts < 2 removed
## 70 proteins with < 2 precursors removed
## Subsetting Gene.Names and MS1 columns for 4d_cyto
## Log2 transform MS1 intensities
## Bayesian PCA imputation for 4d_cyto
## Pivot cleaned dataset to long format for 4d_cyto
## === Processing subset: 4d_synap ===
## Removing proteins with low statistical power in subset 4d_synap
## 45 proteins with p >= 0.05 removed
## 0 proteins with Q >= 0.01 removed
## 0 proteins with C-score < 3 removed
## 566 proteins with run evidence counts < 2 removed
## 80 proteins with < 2 precursors removed
## Subsetting Gene.Names and MS1 columns for 4d_synap
## Log2 transform MS1 intensities
## Bayesian PCA imputation for 4d_synap
## Pivot cleaned dataset to long format for 4d_synap
The subset datasets now contain
batch and
fraction: metadata scalars
(e.g. "5d",
"cyto").
data_full_cleaned: data frame after
all statistical filters (P<0.05, Q<0.01,
C-score≥3, RunEv≥2, precursors≥2, MS1>5th percentile). Contains
metadata columns + MS1/MS2/RunEv/Precursor columns for this subset
only.
data_clean_raw: data frame with
only Gene.Names + MS1
intensity columns for this subset. Columns renamed without
_MS1 suffix
(e.g. 5d_CNT_cyto_1 instead of
5d_CNT_cyto_1_MS1). Still raw intensities
(pre-log2).
data_bpca: matrix
after log2 transformation + bPCA imputation. Shape: proteins
(rows) × samples (columns).
data_pca_mat: transposed
matrix for PCA. Shape: samples (rows) × proteins
(columns).
data_piv: long
tibble for QC plots and limma.
norm_5d_cyto <- median_sweep_pipeline(data_5d_cyto)
## Median sweep normalization for 5d_cyto
norm_5d_synap <- median_sweep_pipeline(data_5d_synap)
## Median sweep normalization for 5d_synap
norm_4d_cyto <- median_sweep_pipeline(data_4d_cyto)
## Median sweep normalization for 4d_cyto
norm_4d_synap <- median_sweep_pipeline(data_4d_synap)
## Median sweep normalization for 4d_synap
qc_5d_cyto <- plot_qc(
data_piv_pre = data_5d_cyto$data_piv,
data_piv_post = norm_5d_cyto$data_piv_swept,
title = "5d cyto"
)
qc_5d_cyto
qc_5d_synap <- plot_qc(
data_piv_pre = data_5d_synap$data_piv,
data_piv_post = norm_5d_synap$data_piv_swept,
title = "5d synap"
)
qc_5d_synap
qc_4d_cyto <- plot_qc(
data_piv_pre = data_4d_cyto$data_piv,
data_piv_post = norm_4d_cyto$data_piv_swept,
title = "4d cyto"
)
qc_4d_cyto
qc_4d_synap <- plot_qc(
data_piv_pre = data_4d_synap$data_piv,
data_piv_post = norm_4d_synap$data_piv_swept,
title = "4d synap"
)
qc_4d_synap
pca_5d_cyto <- plot_pca_comparison(
data_pca_pre = data_5d_cyto$data_pca_mat,
data_pca_post = norm_5d_cyto$data_pca_swept,
title = "5d cyto"
)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 51.0163 46.3455 31.5090 28.8567 26.62518 3.42e-13
## Proportion of Variance 0.3573 0.2948 0.1363 0.1143 0.09731 0.00e+00
## Cumulative Proportion 0.3573 0.6521 0.7884 0.9027 1.00000 1.00e+00
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 55.1355 37.9686 31.6689 30.8974 29.0844 1.187e-13
## Proportion of Variance 0.4173 0.1979 0.1377 0.1310 0.1161 0.000e+00
## Cumulative Proportion 0.4173 0.6152 0.7528 0.8839 1.0000 1.000e+00
pca_5d_cyto
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
pca_5d_synap <- plot_pca_comparison(
data_pca_pre = data_5d_synap$data_pca_mat,
data_pca_post = norm_5d_synap$data_pca_swept,
title = "5d synap"
)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 52.5878 46.6017 27.3024 26.43207 25.05444 2.855e-13
## Proportion of Variance 0.3946 0.3099 0.1064 0.09968 0.08956 0.000e+00
## Cumulative Proportion 0.3946 0.7044 0.8108 0.91044 1.00000 1.000e+00
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 53.9260 38.9400 30.5069 29.3260 28.1777 1.086e-13
## Proportion of Variance 0.4149 0.2163 0.1328 0.1227 0.1133 0.000e+00
## Cumulative Proportion 0.4149 0.6312 0.7640 0.8867 1.0000 1.000e+00
pca_5d_synap
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
pca_4d_cyto <- plot_pca_comparison(
data_pca_pre = data_4d_cyto$data_pca_mat,
data_pca_post = norm_4d_cyto$data_pca_swept,
title = "4d cyto"
)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 65.4666 35.5883 26.06473 23.83509 21.21598 2.835e-13
## Proportion of Variance 0.5911 0.1747 0.09371 0.07836 0.06209 0.000e+00
## Cumulative Proportion 0.5911 0.7659 0.85955 0.93791 1.00000 1.000e+00
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 51.8778 37.8725 35.3342 32.2467 28.9140 1.303e-13
## Proportion of Variance 0.3712 0.1978 0.1722 0.1434 0.1153 0.000e+00
## Cumulative Proportion 0.3712 0.5690 0.7413 0.8847 1.0000 1.000e+00
pca_4d_cyto
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
pca_4d_synap <- plot_pca_comparison(
data_pca_pre = data_4d_synap$data_pca_mat,
data_pca_post = norm_4d_synap$data_pca_swept,
title = "4d synap"
)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 48.5087 38.9431 36.0476 32.6563 29.7738 5.214e-13
## Proportion of Variance 0.3304 0.2129 0.1825 0.1497 0.1245 0.000e+00
## Cumulative Proportion 0.3304 0.5433 0.7258 0.8755 1.0000 1.000e+00
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 49.21 38.2824 37.3785 30.8316 29.7876 1.234e-13
## Proportion of Variance 0.34 0.2058 0.1962 0.1335 0.1246 0.000e+00
## Cumulative Proportion 0.34 0.5458 0.7419 0.8754 1.0000 1.000e+00
pca_4d_synap
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
combined_df <- bind_rows(
as_tibble(norm_5d_cyto$data_pca_swept, rownames = "set"),
as_tibble(norm_5d_synap$data_pca_swept, rownames = "set"),
as_tibble(norm_4d_cyto$data_pca_swept, rownames = "set"),
as_tibble(norm_4d_synap$data_pca_swept, rownames = "set")
)
# set as rownames again, drop column, and fill NAs with 0
combined_mat <- combined_df %>%
column_to_rownames("set") %>%
as.matrix()
combined_mat[is.na(combined_mat)] <- 0
pca_combined <- run_pca_analysis(data_matrix = combined_mat,
plot_title = "PCA complete dataset 4d + 5d",
na_method = "bPCA + median-sweep + missing replaced with 0")
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 55.7254 40.5661 24.69283 21.19612 13.78064 11.24598
## Proportion of Variance 0.4259 0.2257 0.08363 0.06162 0.02605 0.01735
## Cumulative Proportion 0.4259 0.6516 0.73524 0.79687 0.82291 0.84026
## PC7 PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 11.06375 9.81358 9.63166 9.1582 9.01806 8.74694 8.38317
## Proportion of Variance 0.01679 0.01321 0.01272 0.0115 0.01115 0.01049 0.00964
## Cumulative Proportion 0.85705 0.87026 0.88298 0.8945 0.90564 0.91613 0.92577
## PC14 PC15 PC16 PC17 PC18 PC19 PC20
## Standard deviation 8.01507 7.86970 7.74196 7.60455 7.36456 7.16044 7.07138
## Proportion of Variance 0.00881 0.00849 0.00822 0.00793 0.00744 0.00703 0.00686
## Cumulative Proportion 0.93458 0.94308 0.95130 0.95923 0.96667 0.97370 0.98056
## PC21 PC22 PC23 PC24
## Standard deviation 6.97961 6.92075 6.71909 5.094e-14
## Proportion of Variance 0.00668 0.00657 0.00619 0.000e+00
## Cumulative Proportion 0.98724 0.99381 1.00000 1.000e+00
pca_combined$scree_plot / pca_combined$pca_plot + plot_layout(guides = "collect")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
limma_5d_cyto <- run_limma_subset(data_mat = norm_5d_cyto$data_swept,
batch = "5d",
fraction = "cyto")
## Running limma for 5d_cyto
limma_5d_synap <- run_limma_subset(data_mat = norm_5d_synap$data_swept,
batch = "5d",
fraction = "synap")
## Running limma for 5d_synap
limma_4d_cyto <- run_limma_subset(data_mat = norm_4d_cyto$data_swept,
batch = "4d",
fraction = "cyto")
## Running limma for 4d_cyto
limma_4d_synap <- run_limma_subset(data_mat = norm_4d_synap$data_swept,
batch = "4d",
fraction = "synap")
## Running limma for 4d_synap
# Volcano plots
volcano_5d_cyto <- plot_volcano_limma(limma_5d_cyto)
volcano_5d_cyto
volcano_5d_synap <- plot_volcano_limma(limma_5d_synap)
volcano_5d_synap
volcano_4d_cyto <- plot_volcano_limma(limma_4d_cyto)
volcano_4d_cyto
volcano_4d_synap <- plot_volcano_limma(limma_4d_synap)
volcano_4d_synap
cyto_4d <- read.csv("limma_4d_cyto.csv",
row.names = 1,
stringsAsFactors = FALSE)
synap_4d <- read.csv("limma_4d_synap.csv",
row.names = 1,
stringsAsFactors = FALSE)
cyto_5d <- read.csv("limma_5d_cyto.csv",
row.names = 1,
stringsAsFactors = FALSE)
synap_5d <- read.csv("limma_5d_synap.csv",
row.names = 1,
stringsAsFactors = FALSE)
get_up_genes <- function(df) {
listed <- df$first_gene[df$category == "upregulated"]
return(listed)
}
get_down_genes <- function(df) {
listed <- df$first_gene[df$category == "downregulated"]
return(listed)
}
cyto_4d_up <- get_up_genes(cyto_4d)
synap_4d_up <- get_up_genes(synap_4d)
cyto_5d_up <- get_up_genes(cyto_5d)
synap_5d_up <- get_up_genes(synap_5d)
cyto_4d_down <- get_down_genes(cyto_4d)
synap_4d_down <- get_down_genes(synap_4d)
cyto_5d_down <- get_down_genes(cyto_5d)
synap_5d_down <- get_down_genes(synap_5d)
gene_sets_up <- list(
`4d up cyto` = cyto_4d_up,
`4d up synap` = synap_4d_up,
`5d up cyto` = cyto_5d_up,
`5d up synap` = synap_5d_up
)
gene_sets_down <- list(
`4d down cyto` = cyto_4d_down,
`4d down synap` = synap_4d_down,
`5d down cyto` = cyto_5d_down,
`5d down synap` = synap_5d_down
)
# Fit Euler/Venn model
fit_up <- euler(gene_sets_up)
fit_down <- euler(gene_sets_down)
# Plot
Vennup <- plot(
fit_up,
fills = list(fill = vennfillup, alpha = 0.75),
labels = list(col = "black", cex = 0.8),
edges = list(col = nsfill),
quantities = list(type = "counts", cex = 0.7),
main = "Overlap of upregulated genes in 4d and 5d synap and cyto"
)
Venndown <- plot(
fit_down,
fills = list(fill = vennfilldown, alpha = 0.75),
labels = list(col = "black", cex = 0.8),
edges = list(col = nsfill),
quantities = list(type = "counts", cex = 0.7),
main = "Overlap of downregulated genes in 4d and 5d synap and cyto"
)
Vennup
Venndown
# Convert to incidence data frame
up_df <- fromList(gene_sets_up)
down_df <- fromList(gene_sets_down)
upset_up <- upset(
up_df,
nsets = 4,
nintersects = 11,
sets = colnames(up_df),
order.by = "degree",
keep.order = TRUE,
matrix.color = upfill,
main.bar.color = screefill,
sets.bar.color = screefill,
shade.color = nsfill,
mainbar.y.label = "Intersecting upregulated genes",
sets.x.label = "Genes per subset",
set_size.show = TRUE,
text.scale = c(1.3, 1, 1.3, 1.1, 1.3, 1.1)
)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
upset_down <- upset(
down_df,
nsets = 4,
nintersects = 11,
sets = colnames(down_df),
order.by = "degree",
keep.order = TRUE,
matrix.color = downfill,
main.bar.color = screefill,
sets.bar.color = screefill,
shade.color = nsfill,
mainbar.y.label = "Intersecting downregulated genes",
sets.x.label = "Genes per subset",
set_size.show = TRUE,
text.scale = c(1.3, 1, 1.3, 1.1, 1.3, 1.1)
)
upset_up
upset_down